**** Define macros ****

global dir "YOUR PATH HERE"

local occdir "${dir}/xwalk_occ"
local inddir "${dir}/xwalk_ind"

import delimited "`dir'/nlsy79_learning.csv", case(preserve) clear 
do "`dir'/nlsy97_value-labels.do"

** Code variables as missing if values are less than zero
foreach var of varlist _all {
	replace `var'=. if `var'<0
}

** Individual ID codes
rename pubid_1997 pubid
** Demographics - recode to make them similar to NLSY79

** Demographics
gen race=1 if key_race_ethnicity_1997==2 /*Hispanic*/
* Code mixed as Black, following NLSY79
replace race=2 if key_race_ethnicity_1997==1 | key_race_ethnicity_1997==3 /*Black*/
replace race=3 if key_race_ethnicity_1997==4 /*White Non-Hispanic*/
gen sex=key_sex_1997
gen age1997=key_age_1997

* Age by year
forvalues y=1998(1)2011 {
	local z=`y'-1997
	gen age`y'=age1997+`z'
}
forvalues y=2013(2)2019 {
	local z=`y'-1997
	gen age`y'=age1997+`z'
}
** Urbanicity and region
forvalues y=1997(1)2011 {
	gen urban`y'=cv_urban_rural_`y'
	gen div`y'=cv_census_region_`y'
	gen metro`y'=cv_msa_`y'
}

forvalues y=2013(2)2019 {
	gen urban`y'=cv_urban_rural_`y'
	gen div`y'=cv_census_region_`y'
	gen metro`y'=cv_msa_`y'
}

** Highest grade completed
gen educ=cvc_hgc_ever_xrnd
replace educ=. if educ==95

** Employment since date of last interview (DLI)
forvalues y=1997(1)2011 {
	gen emp`y'=1 if yinc_1400_`y'==1
	replace emp`y'=0 if yinc_1400_`y'==0
}
forvalues y=2013(2)2019 {
	gen emp`y'=1 if yinc_1400_`y'==1
	replace emp`y'=0 if yinc_1400_`y'==0
}

*employer ID
*only first 5 to be consistent with 79 wave
forvalues y=1997(1)2011 {
	forvalues z=1(1)5 {
		gen empID`z'_`y'=yemp_uid_0`z'_`y'
	}
}
forvalues y=2013(2)2019 {
   	forvalues z=1(1)5 {
		gen empID`z'_`y'=yemp_uid_0`z'_`y'
	}
}
*count number of unique jobs up to each year starting with 1997
forvalues y=1997(1)2011 {
	egen jobsnum`y'=rownvals(empID1_1997-empID5_`y')
}
forvalues y=2013(2)2019 {
	egen jobsnum`y'=rownvals(empID1_1997-empID5_`y')
}

*wage of job at interview
forvalues y=1997(1)2011 {
	gen wage`y'=cv_hrly_compensation_01_`y'/100
}
forvalues y=2013(2)2019 {
	gen wage`y'=cv_hrly_compensation_01_`y'/100
}

** Hourly rate of pay - by job
forvalues y=1997(1)2011 {
    forvalues z=1(1)5 {
		gen wage`z'_`y'=cv_hrly_compensation_0`z'_`y'/100
	}
}
forvalues y=2013(2)2019 {
    forvalues z=1(1)5 {
		gen wage`z'_`y'=cv_hrly_compensation_0`z'_`y'/100
	}
}
** Hours per week
forvalues y=1997(1)2011 {
    forvalues z=1(1)5 {
		gen hrs`z'_`y'=cv_hrs_per_week_0`z'_`y'
	}
}
forvalues y=2013(2)2019 {
    forvalues z=1(1)5 {
		gen hrs`z'_`y'=cv_hrs_per_week_0`z'_`y'
	}
}
** weeks worked
forvalues y=1997(1)2011 {
    forvalues z=1(1)5 {
		gen weeks`z'_`y'=cv_wkswk_job_dli_0`z'_`y'
	}
}
forvalues y=2013(2)2019 {
    forvalues z=1(1)5 {
		gen weeks`z'_`y'=cv_wkswk_job_dli_0`z'_`y'
	}
}

* Hourly wage excluding those enrolled in school
gen school1997=cv_enrollstat_1997>=8 & cv_enrollstat_1997!=.
forvalues y=1998(1)2004 {
	gen school`y'=cv_enrollstat_edt_`y'>=8 & cv_enrollstat_edt_`y'!=.
}
forvalues y=2005(1)2011 {
		gen school`y'=cv_enrollstat_`y'>=8 & cv_enrollstat_`y'!=.
}
forvalues y=2013(2)2019 {
		gen school`y'=cv_enrollstat_`y'>=8 & cv_enrollstat_`y'!=.
}

gen schl_max=2019 if school2019==1
forvalues y=2017(-2)2011 {
	replace schl_max=`y' if school`y'==1 & schl_max==.
}
forvalues y=2010(-1)1997 {
	replace schl_max=`y' if school`y'==1 & schl_max==.
}

forvalues y=1997(1)2011 {
	gen wage_noschl`y'=wage`y' if school`y'==0
}
forvalues y=2013(2)2019 {
	gen wage_noschl`y'=wage`y' if school`y'==0
}

*wage and salary income
forvalues y=1997(1)2011 {
	gen incwage`y'=yinc_1700_`y'
}
forvalues y=2013(2)2019 {
	gen incwage`y'=yinc_1700_`y'
}

** Occupation: Apply occ1990dd crosswalk (using occ80)
forvalues y=1997(1)2011 {
	forvalues z=1(1)5 {
		rename yemp_occode_2002_0`z'_`y' occ
		replace occ=occ/10
		merge m:1 occ using "`occdir'/occ2000_occ1990dd.dta", keep(master match) nogen
		replace occ1990dd=653 if occ==650
		replace occ1990dd=533 if occ==884
		replace occ1990dd=439 if occ==416
		replace occ1990dd=68 if occ==123
		replace occ1990dd=814 if occ==911 | occ==950 | occ==973 | occ==974
		replace occ1990dd=699 if occ==692 | occ==693 | occ==631
		replace occ1990dd=326 if occ==521
		replace occ1990dd=47 if occ==150
		replace occ1990dd=779 if occ==812
		replace occ1990dd=59 if occ==134
		replace occ1990dd=214 if occ==194
		replace occ1990dd=599 if occ==617 | occ==802
		replace occ1990dd=228 if occ==296
		replace occ1990dd=498 if occ==383
		replace occ1990dd=426 if occ==386
		replace occ1990dd=469 if occ==467
		replace occ1990dd=472 if occ==602
		replace occ1990dd=599 if occ==678
		replace occ1990dd=804 if occ==752
		replace occ1990dd=749 if occ==844
		replace occ1990dd=779 if occ==890
		display "Occupations not matched to occ1990dd: `y'"
		levelsof occ if occ1990dd==.
		rename occ occ`z'_`y'
		rename occ1990dd occ1990dd`z'_`y'	
	}
}
forvalues y=2013(2)2019 {
	forvalues z=1(1)5 {
		rename yemp_occode_2002_0`z'_`y' occ
		replace occ=occ/10
		merge m:1 occ using "`occdir'/occ2000_occ1990dd.dta", keep(master match) nogen
		replace occ1990dd=653 if occ==650
		replace occ1990dd=533 if occ==884
		replace occ1990dd=439 if occ==416
		replace occ1990dd=68 if occ==123
		replace occ1990dd=814 if occ==911 | occ==950 | occ==973 | occ==974
		replace occ1990dd=699 if occ==692 | occ==693 | occ==631
		replace occ1990dd=326 if occ==521
		replace occ1990dd=47 if occ==150
		replace occ1990dd=779 if occ==812
		replace occ1990dd=59 if occ==134
		replace occ1990dd=214 if occ==194
		replace occ1990dd=599 if occ==617 | occ==802	
		replace occ1990dd=228 if occ==296
		replace occ1990dd=498 if occ==383
		replace occ1990dd=426 if occ==386
		replace occ1990dd=469 if occ==467
		replace occ1990dd=472 if occ==602
		replace occ1990dd=599 if occ==678
		replace occ1990dd=804 if occ==752
		replace occ1990dd=749 if occ==844
		replace occ1990dd=779 if occ==890	
		display "Occupations not matched to occ1990dd: `y'"
		levelsof occ if occ1990dd==.
		rename occ occ`z'_`y'
		rename occ1990dd occ1990dd`z'_`y'
	}
}
forvalues y=1997(1)2011 {
	gen occ1990dd`y'=occ1990dd1_`y'
}
forvalues y=2013(2)2019 {
		gen occ1990dd`y'=occ1990dd1_`y'
}

** Industry: Apply ind6090 crosswalk (using ind00)
forvalues y=1997(1)2011 {
	rename yemp_indcode_2002_01_`y' ind00
	replace ind00=floor(ind00/10)
	replace ind00=49 if ind00==48
	replace ind00=207 if ind00==200
	merge m:1 ind00 using "`inddir'/ind00.dta", keep(master match) nogen
	display "Industries not matched to ind6090: `y'"
	levelsof ind00 if ind6090==.
	rename ind00 ind`y'
	rename ind6090 ind6090`y'
}
forvalues y=2013(2)2019 {
	rename yemp_indcode_2002_01_`y' ind00
	replace ind00=floor(ind00/10)
	replace ind00=49 if ind00==48
	replace ind00=207 if ind00==200
	merge m:1 ind00 using "`inddir'/ind00.dta", keep(master match) nogen
	display "Industries not matched to ind6090: `y'"
	levelsof ind00 if ind6090==.
	rename ind00 ind`y'
	rename ind6090 ind6090`y'
}

*Create variable for first occupation after school enrollment
gen first_occ=.
gen occ1990dd2012=.
gen occ1990dd2021=.
forvalues y=1997(1)2010 {
	local z=`y'+1
	local a=`y'+2
	replace first_occ=occ1990dd`z' if schl_max==`y'
	replace first_occ=occ1990dd`a' if schl_max==`y' & first_occ==.
}
forvalues y=2011(2)2019 {
	local z=`y'+2
	replace first_occ=occ1990dd`z' if schl_max==`y'
}
drop occ1990dd2012 occ1990dd2021

*Work experience by year

*egen wkexp_yr_pre=rowtotal(cvc_hours_wk_yr_all_85_xrnd- cvc_hours_wk_yr_all_96_xrnd)
*replace wkexp_yr_pre=wkexp_yr_pre/2080
*replace wkexp_yr_pre=0 if wkexp_yr_pre==.
forvalues y=97(1)99 {
	gen wkexp_yr19`y'=cvc_hours_wk_yr_et_`y'_xrnd/2080
	replace wkexp_yr19`y'=0 if wkexp_yr19`y'==.		
}
forvalues y=0(1)9 {
	gen wkexp_yr200`y'=cvc_hours_wk_yr_et_0`y'_xrnd/2080
	replace wkexp_yr200`y'=0 if wkexp_yr200`y'==.		
}
forvalues y=10(1)20 {
	gen wkexp_yr20`y'=cvc_hours_wk_yr_et_`y'_xrnd/2080
	replace wkexp_yr20`y'=0 if wkexp_yr20`y'==.	
}
*Now do max of 40
foreach var of varlist cvc_hours_wk_yr_et_00_xrnd- cvc_hours_wk_yr_et_99_xrnd {
	replace `var'=2080 if `var'>2080 & `var'!=.
}
forvalues y=97(1)99 {
	gen maxexp_yr19`y'=cvc_hours_wk_yr_et_`y'_xrnd/2080
	replace maxexp_yr19`y'=0 if maxexp_yr19`y'==.		
}
forvalues y=0(1)9 {
	gen maxexp_yr200`y'=cvc_hours_wk_yr_et_0`y'_xrnd/2080
	replace maxexp_yr200`y'=0 if maxexp_yr200`y'==.		
}
forvalues y=10(1)20 {
	gen maxexp_yr20`y'=cvc_hours_wk_yr_et_`y'_xrnd/2080
	replace maxexp_yr20`y'=0 if maxexp_yr20`y'==.	
}
*calculate cumulative work experience
gen wkexp_yr_total1997=wkexp_yr1997
gen maxexp_yr_total1997=maxexp_yr1997
foreach x in wkexp maxexp {
	replace `x'_yr_total1997=0 if `x'_yr_total1997==.
}

forvalues y=1998(1)2019 {
	local a=`y'-1
	gen wkexp_yr_total`y'=wkexp_yr`y'+wkexp_yr_total`a'
}
forvalues y=1998(1)2019 {
	local a=`y'-1
	gen maxexp_yr_total`y'=maxexp_yr`y'+maxexp_yr_total`a'
}
foreach x in wkexp maxexp {
	forvalues y=2012(2)2020 {
		drop `x'_yr`y'
	}
}
foreach x in wkexp maxexp {
	forvalues y=2012(2)2018 {
		drop `x'_yr_total`y'
	}
}
*calculate work experience up to age 18
foreach x in wk max {
	gen `x'exp_yr_youth1997=`x'exp_yr1997
	replace `x'exp_yr_youth1997=0 if age1997>18
	replace `x'exp_yr_youth1997=0 if `x'exp_yr_youth1997==.
}
forvalues y=1998(1)2003 {
	local a=`y'-1
	gen wkexp_yr_youth`y'=wkexp_yr`y'+wkexp_yr_youth`a'
	replace wkexp_yr_youth`y'=0 if age`y'>18
}
forvalues y=1998(1)2003 {
	local a=`y'-1
	gen maxexp_yr_youth`y'=maxexp_yr`y'+maxexp_yr_youth`a'
	replace maxexp_yr_youth`y'=0 if age`y'>18
}
/*
*number of jobs held in each year
forvalues y=95(1)99 {
    gen jobs19`y'=cvc_ttl_job_yr_et_`y'_xrnd
}
forvalues y=0(1)9 {
    gen jobs200`y'=cvc_ttl_job_yr_et_0`y'_xrnd
}
forvalues y=10(1)20 {
    gen jobs20`y'=cvc_ttl_job_yr_et_`y'_xrnd
}
*ignore jobs below age 14
replace jobs1995=0 if age1997<16
replace jobs1996=0 if age1997<15
replace jobs1997=0 if age1997<14
replace jobs1998=0 if age1997<13
*total jobs held at each age
*/
** Social and Non-Cognitive Skill Measures

* Raw variables
rename ytel_tipia_000001_2008 extraverted
rename ytel_tipia_000006_2008 reserved

rename ysaq_282j_2002 disorganized
rename ysaq_282k_2002 conscientious
rename ysaq_282l_2002 undependable
rename ysaq_282m_2002 thorough
rename ysaq_282q_2002 trusting
rename ytel_tipia_000003_2008 disciplined
rename ytel_tipia_000008_2008 careless

* Social skills composite
foreach x in extraverted reserved {
	egen `x'_std=std(`x'), mean(0) std(1)
}
gen animated_std=-reserved_std

egen soc_nlsy2=rowmean(extraverted_std animated_std)
egen soc_nlsy2_std=std(soc_nlsy2), mean(0) std(1)

* Non-cognitive skills composite
foreach x in disorganized conscientious undependable thorough trusting disciplined careless {
	egen `x'_std=std(`x'), mean(0) std(1)
}
gen organized_std=-disorganized_std
gen dependable_std=-undependable_std
gen careful_std=-careless_std

egen noncog=rowmean(organized_std conscientious_std dependable_std thorough_std trusting_std disciplined_std careful_std)
egen noncog_std=std(noncog), mean(0) std(1)


** Restrict sample to selected variables
keep pubid race sex age* urban* div* metro* educ emp* wage* incwage* occ* jobsnum* ///
	ind* wkexp_yr* maxexp_yr* soc_nlsy2_std noncog_std *asvab* schl_max first_occ empID* hrs* weeks*

rename pubid id
** Reshape data as panel
reshape long age urban div metro emp wage wage_noschl incwage empID occ occ1990dd ind ind6090 wkexp_yr wkexp_yr_total wkexp_yr_youth maxexp_yr maxexp_yr_total maxexp_yr_youth jobsnum empID1_ empID2_ empID3_ empID4_ empID5_ wage1_ wage2_ wage3_ wage4_ wage5_ hrs1_ hrs2_ hrs3_ hrs4_ hrs5_ weeks1_ weeks2_ weeks3_ weeks4_ weeks5_ occ1_ occ2_ occ3_ occ4_ occ5_ occ1990dd1_ occ1990dd2_ occ1990dd3_ occ1990dd4_ occ1990dd5_, i(id) j(year)

foreach x in empID wage hrs weeks occ occ1990dd {
    forvalues y=1(1)5 {
	    rename `x'`y'_ `x'`y'
	}
}
drop emp_start_* emp_end_*
foreach x in wk max {
	replace `x'exp_yr_youth=0 if `x'exp_yr_youth==.
}
compress

tsset id year
sort id year
gen emp_tenure=.
forvalues x=1(1)20 {
	replace emp_tenure=year-year[_n-`x'] if empID==empID[_n-`x'] & emp==1 & id==id[_n-`x']
}
replace emp_tenure=0 if emp_tenure==. & emp==1
sort id year
gen occ_tenure=.
forvalues x=1(1)20 {
	replace occ_tenure=year-year[_n-`x'] if occ1990dd==occ1990dd[_n-`x'] & emp==1 & id==id[_n-`x']
}
replace occ_tenure=0 if occ_tenure==. & emp==1
gen emp_occ_tenure=.
forvalues x=1(1)20 {
	replace emp_occ_tenure=year-year[_n-`x'] if empID==empID[_n-`x'] & occ1990dd==occ1990dd[_n-`x'] & emp==1 & id==id[_n-`x']
}
replace emp_occ_tenure=0 if emp_occ_tenure==. & emp==1

/*
gen temp=.
forvalues y=1997(1)2019 {
	replace temp=`y' if emp==1 & year==`y'
}
bysort id: egen firstyr_lf=min(temp)
drop temp
gen work_exp=year-firstyr_lf
replace work_exp=. if emp==0 | emp==.
*/
*merge m:1 occ1990dd using "C:\Users\ddeming\Dropbox\Human Capital and Decisions\onet98_occ1990dd.dta", nogen

* Inflate to 2016 wages
foreach x in wage wage1 wage2 wage3 wage4 wage5 wage_noschl incwage {
	replace `x'=`x'*1.49 if year==1997
	replace `x'=`x'*1.47 if year==1998
	replace `x'=`x'*1.44 if year==1999
	replace `x'=`x'*1.40 if year==2000
	replace `x'=`x'*1.35 if year==2001
	replace `x'=`x'*1.34 if year==2002
	replace `x'=`x'*1.30 if year==2003
	replace `x'=`x'*1.28 if year==2004
	replace `x'=`x'*1.24 if year==2005
	replace `x'=`x'*1.19 if year==2006
	replace `x'=`x'*1.17 if year==2007
	replace `x'=`x'*1.12 if year==2008
	replace `x'=`x'*1.12 if year==2009
	replace `x'=`x'*1.09 if year==2010
	replace `x'=`x'*1.08 if year==2011
	replace `x'=`x'*1.05 if year==2012
	replace `x'=`x'*1.03 if year==2013
	replace `x'=`x'*1.01 if year==2014
	replace `x'=`x'*1.01 if year==2015
	replace `x'=`x'*1.00 if year==2016
	replace `x'=`x'*0.98 if year==2017
	replace `x'=`x'*0.96 if year==2018	
	replace `x'=`x'*0.95 if year==2019	
}

*trim wages 
foreach x in wage wage1 wage2 wage3 wage4 wage5 {
	replace `x'=3 if `x'<3 & `x'!=.
	replace `x'=200 if `x'>200 & `x'!=.
}
*create panel for combining
gen male=sex==1
egen iq_std=std(asvab_math_verbal_score_pct_1999), mean(0) std(1)
gen nlsy97=1
drop asvab_math_verbal_score_pct_1999
compress

save "`dir'\nlsy97_learning_panel.dta", replace
